*Replication file 00a_RF_tuning_US
*Article: Counterfactual Coercion: Could harsher sanctions against Russia have prevented the worst?
*Authors: Thies Niemeier, Gerald Schneider

***************************************************************
***US***
***************************************************************

set seed 1234

*Prepare data
use "Dataset.dta", clear
keep if sender=="US"

** Filter for cases of importance
keep if pot_sanctioned_countries == 1

xtset ccodecow year
gen ln_oil_gas_value_2014 = ln(oil_gas_value_2014+1)
gen sender_colony=US_colony

gen sender_additional=cond(threatEU==1 | impositionEU == 1, 1, 0)
gen only_threat=cond(threatUS==1 & impositionUS == 0, 1, 0)

gen sender_trade = ln_US_Trade_COW
gen coup_dummy = coup1
replace coup_dummy = 0 if coup_dummy == 1
replace coup_dummy = 1 if coup_dummy == 2

* variable which is 1 if a threat or sanction case was ongoing in the dyad
gen sanction_threat = sanction_dyad
replace sanction_threat = 1 if threat_dyad==1
tab sanction_threat
gen sanction_train= sanction_threat if year < 2009
gen sanction_test= sanction_threat if year >= 2009

* lag time-series variables
sort ccodecow year
by ccodecow: gen l_v2x_polyarchy = v2x_polyarchy[_n-1] if year==year[_n-1]+1
by ccodecow: gen l_gd_ptss = gd_ptss[_n-1] if year==year[_n-1]+1
by ccodecow: gen l_coup_dummy = coup_dummy[_n-1] if year==year[_n-1]+1
by ccodecow: gen l_one_sided_violence = one_sided_violence[_n-1] if year==year[_n-1]+1
by ccodecow: gen l_conflict = conflict[_n-1] if year==year[_n-1]+1
by ccodecow: gen l_mid_terr_integrity = mid_terr_integrity[_n-1] if year==year[_n-1]+1
by ccodecow: gen l_ln_GDPpc_imputed = ln_GDPpc_imputed[_n-1] if year==year[_n-1]+1
by ccodecow: gen l_sender_trade = sender_trade[_n-1] if year==year[_n-1]+1
by ccodecow: gen l_ln_oil_gas_value = ln_oil_gas_value_2014[_n-1] if year==year[_n-1]+1
by ccodecow: gen l_defense_alliance = defense_alliance[_n-1] if year==year[_n-1]+1

* create dummy variables
tabulate l_gd_ptss, generate (pol_terr)


** Imposition
* Tuning number of iterations of rf model
* Randomize data
gen u=0
replace u=1 if year >= 2009
sort u
gen out_of_bag_error1 = .
gen training_error = .
gen recall_iter = .
gen recall_training_sample_iter = .
gen iter1 = .
local j = 0

forvalues i = 100(200)1500{
local j = `j' + 1
rforest sanction_threat l_v2x_polyarchy pol_terr* l_coup_dummy ///
l_one_sided_violence l_conflict l_mid_terr_integrity ///
l_ln_GDPpc_imputed l_sender_trade l_ln_oil_gas_value ///
sender_colony l_defense_alliance in 1/2789, ///
type(class) iter(`i') numvars(1)
replace iter1 = `i' in `j'
replace out_of_bag_error1 = `e(OOB_Error)' in `j'
predict p in 2790/3781
count if p==1 & sanction_test == 1
replace recall_iter = r(N)/203 in `j'
predict p2 in 1/2789
count if p2==1 & sanction_train == 1
replace recall_training_sample_iter = r(N)/785 in `j'
replace training_error = `e(error_rate)' in `j'
drop p p2
}

*Additional tuning based on recall rate, as outcome variable is imbalanced.
label var recall_iter "Recall Test Sample"
label var recall_training_sample_iter "Recall Training Sample"
scatter recall_iter recall_training_sample_iter iter1

graph export "Supplemental_Material\Random_Forest_Setup\RF_US_tuning_pot_sanctions_iterations_Recall_Training_Test.png", as(png) name("Graph") replace

* Tuning based on Accuracy
label var out_of_bag_error1 "Out of Bag Error"
label var training_error "Accuracy Test Sample"
scatter out_of_bag_error1 training_error iter1
graph export "Supplemental_Material\Random_Forest_Setup\RF_US_tuning_pot_sanctions_iterations_Accuracy.png", as(png) name("Graph") replace
* No benefit to increasing beyond 1500 iterations

drop out_of_bag_error1 training_error recall_iter recall_training_sample_iter

* Tuning number of variables to include in model
gen out_of_bag_error1 = .
gen training_error = .
gen nvars = .
gen recall_iter = .
gen recall_training_sample_iter = .
local j = 0

forvalues i = 1(1)15{
local j = `j' + 1
rforest sanction_threat l_v2x_polyarchy pol_terr* l_coup_dummy ///
l_one_sided_violence l_conflict l_mid_terr_integrity ///
l_ln_GDPpc_imputed l_sender_trade l_ln_oil_gas_value ///
sender_colony l_defense_alliance in 1/2789, ///
type(class) iter(1500) numvars(`i')
replace nvars = `i' in `j'
replace out_of_bag_error1 = `e(OOB_Error)' in `j'
predict p in 2790/3781
count if p==1 & sanction_test == 1
replace recall_iter = r(N)/203 in `j'
predict p2 in 1/2789
count if p2==1 & sanction_train == 1
replace recall_training_sample_iter = r(N)/785 in `j'
replace training_error = `e(error_rate)' in `j'
drop p p2
}

*Additional tuning based on recall rate, as outcome variable is imbalanced.
label var recall_iter "Recall Test Sample"
label var recall_training_sample_iter "Recall Training Sample"
scatter recall_iter recall_training_sample_iter nvars
graph export "Supplemental_Material\Random_Forest_Setup\RF_US_tuning_pot_sanctions_numvars_Recall_Training_Test.png", as(png) name("Graph") replace

* Accuracy
label var out_of_bag_error1 "Out of Bag Error"
label var training_error "Accuracy Test Sample"
scatter out_of_bag_error1 training_error nvars 
graph export "Supplemental_Material\Random_Forest_Setup\RF_US_tuning_pot_sanctions_numvars_Accuracy.png", as(png) name("Graph") replace
* Accuracy decreases until the end, so include all 15 vars.
